if (!exists("params")){ params <- rmarkdown::yaml_front_matter('~/git/0.RPackages/LEEF/measurements/LEEF.toc/inst/Import and filter TOC LEEF-2.qmd')$params}library(LEEF.analysis)library(LEEF.measurement.toc)library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
Warning in file.copy(file.path(input, "..", "00.general.parameter", "."), :
problem copying
/Volumes/LEEF/0.TOC/LEEF-2//0.raw.data/../00.general.parameter/. to
/Volumes/LEEF/0.TOC/LEEF-2//1.pre-processed.data/toc/.: No such file or
directory
As the number of additional samples as well as the cv were different for some samples, we have to re-calculate conc and cv. The values which will be used are
samples: 2
extra_samples: 2
max_cv: 2
In case there are more extra samples, the last ones were be discarded.
Read data and discard more than 2 extra samples
Code
toc <-lapply( data,function(x){ result <- x$data[,1:min(max_col, ncol(x$data))]if (ncol(x$data) < max_col) { sn <-paste0("conc_", 1:(extra_samples + samples)) snm <- sn[!(sn %in%names(result))]for (i in snm){ result[,snm] <-NA } }return(result) }) |>do.call(what = rbind)toc$id <-1:nrow(toc)
Remove missing inj_type
We have the following measurements files with measurements without inj_type or NULL. These can nod be analysed and need to be excluded.
In addition to the analysed samples, two more validation and null samples (H2O) were taken. Upon further consideration, these were considered as not useful and excluded from the analysis. They are saved as toc_val_samples.csv.
One can see for TC two peaks clearly separated. The values smaller than 5 will be set to NA as they are unrealistically low and can be linked to measuring errors in the machine.
Plot the previous plot zoomed in to conc <= 10 for each bottle
One can see for TC two peaks clearly separated. The values smaller than 0.3 will be set to NA as they are unrealistically low and can be linked to measuring errors in the machine.
Plot the previous plot zoomed in to conc <= 3 for each bottle
Recalculate conc and cv for all measurements except TOC
As the number of additional samples as well as the cv were different for some samples, we have to re-calculate conc and cv. The values which will be used are - samples: 2 - extra_samples: 2 - max_cv: 2 In case there are more extra samples, the last ones were be discarded.
As we are re-calculating TOC later, we can exclude the TOC values here.
Writing data...
! Be careful, path_to_parquet should be a file name, using : /Volumes/LEEF/0.TOC/LEEF-2/parquet/experimental_design//experimental_design.parquet
Writing data...
✔ Data are available in parquet file under /Volumes/LEEF/0.TOC/LEEF-2/parquet/experimental_design//experimental_design.parquet
Writing data...
Code
toc_original <- toctoc$id <-1:nrow(toc)
Plot after filtering
Now let’s look at the plots of the measurements per bottle per timestep
And some plots of the duplicate concentration values only
Code
dat <-arrow_read_toc(db = params$parquet) %>%collect()if (nrow(dat) >0){dat$id <-1:nrow(dat)ids <- dat %>%filter(!is.na(bottle)) %>%group_by(timestamp, bottle, type) %>%summarize(min(id), max(id), n =n()) %>%filter(n >1)ids <-c(ids$`min(id)`, ids$`max(id)`) |>unique()x <- dat %>%filter(id %in% ids) %>%group_by(day, timestamp, bottle, type) %>%summarise(timestamp, bottle, type, mic =min(concentration), mac =max(concentration)) %>%mutate(mdiff = mac - mic)}
Warning: There were 2 warnings in `summarise()`.
The first warning was:
ℹ In argument: `mic = min(concentration)`.
Caused by warning in `min()`:
! no non-missing arguments to min; returning Inf
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
dplyr 1.1.0.
ℹ Please use `reframe()` instead.
ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
always returns an ungrouped data frame and adjust accordingly.
TN Duplicates
Code
if (nrow(x) >0){ pl <- x %>%filter(type =="TN") %>% ggplot2::ggplot(ggplot2::aes(x = mic, y = mac, colour = type)) + ggplot2::geom_point() + ggplot2::xlab("Smaler concentration Value") + ggplot2::ylab("Larger concentration Value")ggMarginal(pl, type="histogram")}
TC Duplicates
Code
if (nrow(x) >0){ pl <- x %>%filter(type =="TC") %>% ggplot2::ggplot(ggplot2::aes(x = mic, y = mac, colour = type)) + ggplot2::geom_point() + ggplot2::xlab("Smaler concentration Value") + ggplot2::ylab("Larger concentration Value")ggMarginal(pl, type="histogram")}
IC Duplicates
Code
if (nrow(x) >0){ pl <- x %>%filter(type =="IC") %>% ggplot2::ggplot(ggplot2::aes(x = mic, y = mac, colour = type)) + ggplot2::geom_point() + ggplot2::xlab("Smaler concentration Value") + ggplot2::ylab("Larger concentration Value")ggMarginal(pl, type="histogram")}
TOC Duplicates
Code
if (nrow(x) >0){ pl <- x %>%filter(type =="TOC") %>% ggplot2::ggplot(ggplot2::aes(x = mic, y = mac, colour = type)) + ggplot2::geom_point() + ggplot2::xlab("Smaler concentration Value") + ggplot2::ylab("Larger concentration Value")ggMarginal(pl, type="histogram")}